package spider.hysrlzy;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import spider.Capturer;

import java.io.IOException;
import java.net.URL;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;

/**
 * url 采集器
 */
public class UserUrlCapturer implements Capturer {
    @Override
    public Set<String> capture(String url) {
        try {
            URL u = new URL(url);
            Document doc = Jsoup.parse(u, 5000);
            Elements ele = doc.select(".sNewList12>a,.seaList12>a");
            Set<String> set = new HashSet<>();
            for (int i = 0; i < ele.size(); i++) {
                set.add(ele.get(i).attr("href"));
            }
            return set;
        } catch (IOException e) {
            e.printStackTrace();
        }
        // 返回空set集合
        return Collections.emptySet();
    }
}
